#Code to create measure of dyadic representation
#Created by Mac Lockhart, Feb 16 2023

library(tidyverse)
library(readr)
library(fuzzyjoin)
library(car)
library(stringr)
library(maps)
library(data.table)
setwd(rprojroot::find_root(rprojroot::is_rstudio_project))

df <- fread("Intermediate Data/merged_cces_rollcall.csv")
df<-tibble(df)
link <- readxl::read_xlsx("Intermediate Data/policy_outcomes_and_notes.xlsx")
link<-link[!is.na(link$data_num),]

results <- tibble(df$X)
MCs <- tibble(df$X)
votes<- c(paste0("V", 1:111))
#unique combination of datanum and data_year
for(vote in votes){
  years <- unique(link[link$variable_name_house==vote,]$data_year)
  years<-years[!is.na(years)]
  for(year in years){
    rcv <- link[link$variable_name_house==vote & link$data_year==year & 
                  !is.na(link$data_num),]$data_num
    rcv<-rcv[!is.na(rcv)]
    if(length(rcv)==1){
      vote_1<-paste0("1_",vote, ".hr")
      vote_2<-paste0("2_",vote, ".hr")
      vote_3<-paste0("3_",vote, ".hr")
      vote_4<-paste0("4_",vote, ".hr")
      vote_5<-paste0("5_",vote, ".hr")
      vote_6<-paste0("6_",vote, ".hr")
      view <- paste0("rcv",rcv,"_r")
      #identify respondents view using RCV which is the CCES vote number
      result<-rowSums(cbind((df[,vote_1]==df[, view]), (df[,vote_2]==df[, view]), 
                            (df[,vote_3]==df[, view]), (df[,vote_4]==df[, view]),
                            (df[,vote_5]==df[, view]), (df[,vote_6]==df[, view])),
                             na.rm=T)
      #identifies all 4 potential vote outcomes
      v1 <- ifelse(!is.na(df[,vote_1]), (df[,vote_1]==df[, view]), NA) 
      v2 <- ifelse(!is.na(df[,vote_2]), (df[,vote_2]==df[, view]), NA)
      v3 <- ifelse(!is.na(df[,vote_3]), (df[,vote_3]==df[, view]), NA)
      v4 <- ifelse(!is.na(df[,vote_4]), (df[,vote_4]==df[, view]), NA)
      v5 <- ifelse(!is.na(df[,vote_5]), (df[,vote_5]==df[, view]), NA)
      v6 <- ifelse(!is.na(df[,vote_6]), (df[,vote_6]==df[, view]), NA)
      #picks both non missing values from 4 possible votes
      result <- coalesce(v1, v2, v3, v4, v5, v6)
      rm(vote1, vote2, v1, v2, v3, v4, v5, v6)
      #repeats process above but just identifies Senators who cast the above votes
      s1 <- ifelse(!is.na(df[,vote_1]), df$Rep1, NA)
      s2 <- ifelse(!is.na(df[,vote_2]), df$Rep2, NA)
      s3 <- ifelse(!is.na(df[,vote_3]), df$Rep3, NA)
      s4 <- ifelse(!is.na(df[,vote_4]), df$Rep4, NA)
      s5 <- ifelse(!is.na(df[,vote_5]), df$Rep5, NA)
      s6 <- ifelse(!is.na(df[,vote_6]), df$Rep6, NA)
      MC<-coalesce(s1, s2, s3, s4, s5, s6)
      rm(s1, s2, s3, s4, s5, s6)
      
      result[df$year!=year]<-NA
      result <- tibble(result)
      MC[df$year!=year]<-NA
      MC <- tibble(MC)
      colnames(MC)<-paste0(vote, year, "_MC")
      MCs<-cbind(MCs, MC)
      #result$year <- df$year
      result
      colnames(result)<-paste0(vote, year)
      results<-cbind(results, result)
      n<-ncol(results)
      print(sum(result==1, na.rm=T))
      result<-NULL
    }
    print(year)}
}

results<-tibble(results)
results<-results %>%
  mutate(Total = select(., `V12009`:`V1092010`) %>% rowSums(na.rm = TRUE))

table(results$Total, df$year)

results %>%
  select(where(function(x) any(!is.na(x))))

#Make the table long rather than wide (i.e. per vote)
DT <- as.data.table(results)
DT<-DT[,which(unlist(lapply(DT, function(x)!all(is.na(x))))),with=F]
data_long <- gather(DT, vote, outcome, `V12009`:`V1092010`, factor_key=TRUE)
colnames(data_long)<-c("X","total", "vote", "outcome")

#Make the table long rather than wide (i.e. per vote)
DT_MC <- as.data.table(MCs)
DT_MC<-DT_MC[,which(unlist(lapply(DT_MC, function(x)!all(is.na(x))))),with=F]
data_long_MC <- gather(DT_MC, vote, outcome, `V12009_MC`:`V1092010_MC`, factor_key=TRUE)
colnames(data_long_MC)<-c("X", "vote", "MC")
data_long_MC$vote <- gsub('.{3}$', '', data_long_MC$vote)

data_long<-left_join(data_long, data_long_MC, by=c("X"="X", "vote" = "vote"))

#Merge back in individual variables from df
merge <- link %>% mutate(resp_vote_ID = paste0("rcv", data_num, "_r"),
                         dat_vote_ID = paste0(variable_name_house, data_year)) %>%  
  select(resp_vote_ID, dat_vote_ID, data_year)


views<-df %>% select(ends_with("_r"), "X", "year") %>% pivot_longer(
  cols = starts_with("rcv"), # Selects all columns starting with "rcv"
  names_to = "resp_vote_ID",     # New column for the variable names
  values_to = "respondent_view"        # New column for the values
) %>% left_join(merge, by=c("resp_vote_ID"  = "resp_vote_ID", "year" = "data_year")) %>%
  filter(!is.na(respondent_view))



voter_info<-df[, c(1:15,37:47)]
x<-left_join(data_long, voter_info, by=c("X" = "X"))
x$dat_vote_ID <- str_extract(x$vote, "^[^_]+")
gc()
x<-right_join(x, views, by=c("X" = "X", "dat_vote_ID" = "dat_vote_ID"))
x$outcome <- ifelse(x$respondent_view==8, NA, x$outcome)

write_csv2(x, "Intermediate Data/house_vote_level_data.csv")
